#download packages
require(ggplot2)
library(tidyverse)
library(dplyr)
require(car)
require(MASS)
require(boot)
library(lattice) # alternatively can use package ZIM for zero-inflated models
library(lmtest)
library(MASS)
library(tidyverse)
library(easystats)
library(ggeffects)
library(ggplot2)
library(scales)
library(gridExtra)
library(cowplot)
p <- read_csv("DOH.csv")
p
## # A tibble: 9,867 × 32
## Post_oldid Real_ID date COVID_Post RiskFactor_2_CTA SocialDisparities_3_…¹
## <chr> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 1 2069 9/22/20 1 88 88
## 2 2 2070 8/27/20 1 88 88
## 3 3 2071 12/29/… 1 88 88
## 4 4 2072 9/29/20 1 88 88
## 5 5 2073 10/6/20 1 88 88
## 6 6 2074 12/31/… 1 88 88
## 7 7 403 4/2/20 1 88 88
## 8 8 5425 12/1/20 1 88 88
## 9 9 5842 12/20/… 1 88 88
## 10 10 2075 3/5/20 1 88 88
## # ℹ 9,857 more rows
## # ℹ abbreviated name: ¹SocialDisparities_3_CTA
## # ℹ 26 more variables: Debunk_4_CTA <dbl>, UncertaintyReduction_5_CTA <dbl>,
## # Testing_6_CTA <dbl>, Vaccine_7_CTA <dbl>, Mental_8_YesNo <dbl>,
## # Emotion_9_Number <dbl>, Praise_10_Number <dbl>, Pop_11_Number <dbl>,
## # Language_12_Number <dbl>, Action_13_CTA <dbl>, Positive_14a_Number <dbl>,
## # Negative_14b_Number <dbl>, Individual_14c_Number <dbl>, …
# Install and load the openxlsx package
#install.packages("openxlsx")
#library(openxlsx)
# Assuming p is your dataframe
# Replace this with your actual dataframe
# For example, p <- read.csv('your_data.csv') or p <- data.frame(...)
# Write the dataframe to an Excel file
#write.xlsx(p, file = "DOH_Final.xlsx")
#Change variable names
p1 <-
p %>%
rename(State_recode = `State_ recode`,
White_percent = `White_\bpercent`,
Risk_Factor ='RiskFactor_2_CTA',
Social_Disparities = 'SocialDisparities_3_CTA',
Debunk_MI = 'Debunk_4_CTA',
Uncertainty_Reduction = 'UncertaintyReduction_5_CTA',
Testing = 'Testing_6_CTA',
Vaccine = 'Vaccine_7_CTA',
Pre_Action = 'Action_13_CTA',
Positive_eff = 'Positive_14a_Number',
Negative_eff = 'Negative_14b_Number',
Individual_eff = 'Individual_14c_Number',
Collective_eff = 'Collective_14d_Number',
Prosperity_ranking = 'ProspertiyRanking',
Minority_percent = 'minority_percent'
)
#Frequency with 88
# Plotting frequency distribution for each variable
for (col in names(p1)) {
if (is.numeric(p1[[col]])) {
plot <- ggplot(p1, aes_string(x = as.name(col))) +
geom_bar() +
labs(title = paste("Frequency Distribution of", col),
x = col,
y = "Frequency")
print(plot)
# Save or print the plot as needed
# ggsave(paste("plot_", col, ".png", sep=""), plot = plot, device = "png")
}
}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 422 rows containing non-finite values (`stat_count()`).
p4 <-
p1 %>%
mutate_all(~ ifelse(. %in% c('88', '888', "99", "999"), 0, .))
p4
## # A tibble: 9,867 × 32
## Post_oldid Real_ID date COVID_Post Risk_Factor Social_Disparities Debunk_MI
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 2069 9/22/… 1 0 0 0
## 2 2 2070 8/27/… 1 0 0 0
## 3 3 2071 12/29… 1 0 0 0
## 4 4 2072 9/29/… 1 0 0 0
## 5 5 2073 10/6/… 1 0 0 0
## 6 6 2074 12/31… 1 0 0 0
## 7 7 403 4/2/20 1 0 0 0
## 8 8 5425 12/1/… 1 0 0 0
## 9 9 5842 12/20… 1 0 0 0
## 10 10 2075 3/5/20 1 0 0 0
## # ℹ 9,857 more rows
## # ℹ 25 more variables: Uncertainty_Reduction <dbl>, Testing <dbl>,
## # Vaccine <dbl>, Mental_8_YesNo <dbl>, Emotion_9_Number <dbl>,
## # Praise_10_Number <dbl>, Pop_11_Number <dbl>, Language_12_Number <dbl>,
## # Pre_Action <dbl>, Positive_eff <dbl>, Negative_eff <dbl>,
## # Individual_eff <dbl>, Collective_eff <dbl>, State_recode <dbl>,
## # Gov_poli <dbl>, Trifecta <dbl>, White_percent <dbl>, …
table(p4$Uncertainty_Reduction)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 1349 4561 1733 941 473 306 199 102 68 24 61 22 3 4 4 5
## 16 17 18 25 26
## 5 1 4 1 1
table(p4$Testing)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 8741 644 279 81 32 12 17 4 2 4 28 12 1 3 1 3
## 18 26
## 2 1
table(p4$Vaccine)
##
## 0 1 2 3 4 5 6 7 8 9 15
## 9394 272 134 26 11 10 10 5 2 1 2
#frequency without 88
# Plotting frequency distribution for each variable
for (col in names(p4)) {
if (is.numeric(p4[[col]])) {
plot1 <- ggplot(p4, aes_string(x = as.name(col))) +
geom_bar() +
labs(title = paste("Frequency Distribution of", col),
x = col,
y = "Frequency")
print(plot1)
# Save or print the plot as needed
# ggsave(paste("plot_", col, ".png", sep=""), plot = plot, device = "png")
}
}
## Warning: Removed 422 rows containing non-finite values (`stat_count()`).
============ #Transform to bi-nominal only 1 and 0 for various variables
p3 <-
p1 %>%
mutate(Risk_Factor = ifelse(Risk_Factor %in% c('88', '888'), 0, 1)) %>%
mutate(Social_Disparities = ifelse(Social_Disparities %in% c('88', '888'), 0, 1)) %>%
mutate(Debunk_MI = ifelse(Debunk_MI %in% c('88', '888'), 0, 1)) %>%
mutate(Uncertainty_Reduction = ifelse(Uncertainty_Reduction %in% c('88', '888'), 0, 1))
p3
## # A tibble: 9,867 × 32
## Post_oldid Real_ID date COVID_Post Risk_Factor Social_Disparities Debunk_MI
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 2069 9/22/… 1 0 0 0
## 2 2 2070 8/27/… 1 0 0 0
## 3 3 2071 12/29… 1 0 0 0
## 4 4 2072 9/29/… 1 0 0 0
## 5 5 2073 10/6/… 1 0 0 0
## 6 6 2074 12/31… 1 0 0 0
## 7 7 403 4/2/20 1 0 0 0
## 8 8 5425 12/1/… 1 0 0 0
## 9 9 5842 12/20… 1 0 0 0
## 10 10 2075 3/5/20 1 0 0 0
## # ℹ 9,857 more rows
## # ℹ 25 more variables: Uncertainty_Reduction <dbl>, Testing <dbl>,
## # Vaccine <dbl>, Mental_8_YesNo <dbl>, Emotion_9_Number <dbl>,
## # Praise_10_Number <dbl>, Pop_11_Number <dbl>, Language_12_Number <dbl>,
## # Pre_Action <dbl>, Positive_eff <dbl>, Negative_eff <dbl>,
## # Individual_eff <dbl>, Collective_eff <dbl>, State_recode <dbl>,
## # Gov_poli <dbl>, Trifecta <dbl>, White_percent <dbl>, …
glimpse(p3)
## Rows: 9,867
## Columns: 32
## $ Post_oldid <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10…
## $ Real_ID <dbl> 2069, 2070, 2071, 2072, 2073, 2074, 403, 5425, 5…
## $ date <chr> "9/22/20", "8/27/20", "12/29/20", "9/29/20", "10…
## $ COVID_Post <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Risk_Factor <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Social_Disparities <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Debunk_MI <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Uncertainty_Reduction <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, …
## $ Testing <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 1, 88, 88, 88, 8…
## $ Vaccine <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, …
## $ Mental_8_YesNo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Emotion_9_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Praise_10_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Pop_11_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Language_12_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, …
## $ Pre_Action <dbl> 0, 0, 0, 0, 0, 0, 7, 0, 1, 1, 1, 1, 0, 1, 1, 3, …
## $ Positive_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ Negative_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Individual_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Collective_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ State_recode <dbl> 5, 5, 5, 5, 5, 5, 2, 30, 39, 5, 5, 9, 6, 51, 51,…
## $ Gov_poli <dbl> 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, …
## $ Trifecta <dbl> 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 2, 1, 2, 2, 2, 3, …
## $ White_percent <dbl> 36.5, 36.5, 36.5, 36.5, 36.5, 36.5, 60.2, 54.6, …
## $ Minority_percent <dbl> 63.5, 63.5, 63.5, 63.5, 63.5, 63.5, 39.8, 45.4, …
## $ Poverty_percent <dbl> 11.8, 11.8, 11.8, 11.8, 11.8, 11.8, 10.2, 9.1, 1…
## $ Prosperity_ranking <dbl> 25, 25, 25, 25, 25, 25, 39, 12, 18, 25, 25, 31, …
## $ year <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, …
## $ month <dbl> 9, 8, 12, 9, 10, 12, 4, 12, 12, 3, 4, 4, 12, 4, …
## $ day <dbl> 22, 27, 29, 29, 6, 31, 2, 1, 20, 5, 22, 17, 22, …
## $ state <chr> "CA", "CA", "CA", "CA", "CA", "CA", "AK", "NJ", …
## $ cases <dbl> 2860, 5352, 34167, 3143, 2602, 32295, 3, 4606, 0…
table(p3$Risk_Factor)
##
## 0 1
## 8524 1343
table(p3$Social_Disparities)
##
## 0 1
## 9691 176
table(p3$Debunk_MI)
##
## 0 1
## 9432 435
table(p3$Uncertainty_Reduction)
##
## 0 1
## 478 9389
table(p3$Positive_eff)
##
## 0 1 2 3 4 5 6 7 8
## 5875 2563 949 289 124 35 20 7 5
table(p3$Negative_eff)
##
## 0 1 2 3 5 6
## 9728 99 27 10 2 1
table(p3$Collective_eff)
##
## 0 1 2 3 4 5 6 7 8
## 6635 2315 622 186 64 24 13 5 3
table(p3$Individual_eff)
##
## 0 1 2 3 4 5 6 7
## 8523 1078 171 35 49 7 2 2
# Check the column names in p3
colnames(p3)
## [1] "Post_oldid" "Real_ID" "date"
## [4] "COVID_Post" "Risk_Factor" "Social_Disparities"
## [7] "Debunk_MI" "Uncertainty_Reduction" "Testing"
## [10] "Vaccine" "Mental_8_YesNo" "Emotion_9_Number"
## [13] "Praise_10_Number" "Pop_11_Number" "Language_12_Number"
## [16] "Pre_Action" "Positive_eff" "Negative_eff"
## [19] "Individual_eff" "Collective_eff" "State_recode"
## [22] "Gov_poli" "Trifecta" "White_percent"
## [25] "Minority_percent" "Poverty_percent" "Prosperity_ranking"
## [28] "year" "month" "day"
## [31] "state" "cases"
# Check the data types in p3 for relevant columns
str(p3[c("Risk_Factor", "Social_Disparities", "Debunk_MI", "Uncertainty_Reduction")])
## tibble [9,867 × 4] (S3: tbl_df/tbl/data.frame)
## $ Risk_Factor : num [1:9867] 0 0 0 0 0 0 0 0 0 0 ...
## $ Social_Disparities : num [1:9867] 0 0 0 0 0 0 0 0 0 0 ...
## $ Debunk_MI : num [1:9867] 0 0 0 0 0 0 0 0 0 0 ...
## $ Uncertainty_Reduction: num [1:9867] 1 1 1 1 1 1 0 1 1 1 ...
# Modify the code to select the columns
p2 <- p3 %>%
mutate(
Risk_Factor = as.factor(Risk_Factor),
Social_Disparities = as.factor(Social_Disparities),
Debunk_MI = as.factor(Debunk_MI),
Uncertainty_Reduction = as.factor(Uncertainty_Reduction)
)
p2
## # A tibble: 9,867 × 32
## Post_oldid Real_ID date COVID_Post Risk_Factor Social_Disparities Debunk_MI
## <chr> <dbl> <chr> <dbl> <fct> <fct> <fct>
## 1 1 2069 9/22/… 1 0 0 0
## 2 2 2070 8/27/… 1 0 0 0
## 3 3 2071 12/29… 1 0 0 0
## 4 4 2072 9/29/… 1 0 0 0
## 5 5 2073 10/6/… 1 0 0 0
## 6 6 2074 12/31… 1 0 0 0
## 7 7 403 4/2/20 1 0 0 0
## 8 8 5425 12/1/… 1 0 0 0
## 9 9 5842 12/20… 1 0 0 0
## 10 10 2075 3/5/20 1 0 0 0
## # ℹ 9,857 more rows
## # ℹ 25 more variables: Uncertainty_Reduction <fct>, Testing <dbl>,
## # Vaccine <dbl>, Mental_8_YesNo <dbl>, Emotion_9_Number <dbl>,
## # Praise_10_Number <dbl>, Pop_11_Number <dbl>, Language_12_Number <dbl>,
## # Pre_Action <dbl>, Positive_eff <dbl>, Negative_eff <dbl>,
## # Individual_eff <dbl>, Collective_eff <dbl>, State_recode <dbl>,
## # Gov_poli <dbl>, Trifecta <dbl>, White_percent <dbl>, …
# write a csv file of the p2
write.csv(p2, file = "p2.csv", row.names = FALSE)
===== NEW====
glimpse(p3)
## Rows: 9,867
## Columns: 32
## $ Post_oldid <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10…
## $ Real_ID <dbl> 2069, 2070, 2071, 2072, 2073, 2074, 403, 5425, 5…
## $ date <chr> "9/22/20", "8/27/20", "12/29/20", "9/29/20", "10…
## $ COVID_Post <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Risk_Factor <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Social_Disparities <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Debunk_MI <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Uncertainty_Reduction <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, …
## $ Testing <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 1, 88, 88, 88, 8…
## $ Vaccine <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, …
## $ Mental_8_YesNo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Emotion_9_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Praise_10_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Pop_11_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Language_12_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, …
## $ Pre_Action <dbl> 0, 0, 0, 0, 0, 0, 7, 0, 1, 1, 1, 1, 0, 1, 1, 3, …
## $ Positive_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ Negative_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Individual_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Collective_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ State_recode <dbl> 5, 5, 5, 5, 5, 5, 2, 30, 39, 5, 5, 9, 6, 51, 51,…
## $ Gov_poli <dbl> 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, …
## $ Trifecta <dbl> 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 2, 1, 2, 2, 2, 3, …
## $ White_percent <dbl> 36.5, 36.5, 36.5, 36.5, 36.5, 36.5, 60.2, 54.6, …
## $ Minority_percent <dbl> 63.5, 63.5, 63.5, 63.5, 63.5, 63.5, 39.8, 45.4, …
## $ Poverty_percent <dbl> 11.8, 11.8, 11.8, 11.8, 11.8, 11.8, 10.2, 9.1, 1…
## $ Prosperity_ranking <dbl> 25, 25, 25, 25, 25, 25, 39, 12, 18, 25, 25, 31, …
## $ year <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, …
## $ month <dbl> 9, 8, 12, 9, 10, 12, 4, 12, 12, 3, 4, 4, 12, 4, …
## $ day <dbl> 22, 27, 29, 29, 6, 31, 2, 1, 20, 5, 22, 17, 22, …
## $ state <chr> "CA", "CA", "CA", "CA", "CA", "CA", "AK", "NJ", …
## $ cases <dbl> 2860, 5352, 34167, 3143, 2602, 32295, 3, 4606, 0…
colnames(p2)
## [1] "Post_oldid" "Real_ID" "date"
## [4] "COVID_Post" "Risk_Factor" "Social_Disparities"
## [7] "Debunk_MI" "Uncertainty_Reduction" "Testing"
## [10] "Vaccine" "Mental_8_YesNo" "Emotion_9_Number"
## [13] "Praise_10_Number" "Pop_11_Number" "Language_12_Number"
## [16] "Pre_Action" "Positive_eff" "Negative_eff"
## [19] "Individual_eff" "Collective_eff" "State_recode"
## [22] "Gov_poli" "Trifecta" "White_percent"
## [25] "Minority_percent" "Poverty_percent" "Prosperity_ranking"
## [28] "year" "month" "day"
## [31] "state" "cases"
table(p3$Pre_Action)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 991 4691 1800 996 494 313 206 103 70 25 62 23 3 4 4 5
## 16 17 18 25 26 88
## 5 1 4 1 1 65
na_check_Risk_Factor <- any(is.na(p2$Risk_Factor))
na_check_Risk_Factor
## [1] FALSE
na_check_Social_Disparities <- any(is.na(p2$Social_Disparities))
na_check_Social_Disparities
## [1] FALSE
na_check_Debunk_MI <- any(is.na(p2$Debunk_MI))
na_check_Debunk_MI
## [1] FALSE
na_check_Uncertainty_Reduction <- any(is.na(p2$Uncertainty_Reduction))
na_check_Uncertainty_Reduction
## [1] FALSE
======== #RQ1: To what extent do the state health departments communicate about risks (i.e., uncertainty reduction, health risk factors, misinformation correction and disparities) related to COVID-19 on their Facebook accounts between January and December 2020?
table(p2$Risk_Factor)
##
## 0 1
## 8524 1343
table(p2$Social_Disparities)
##
## 0 1
## 9691 176
table(p2$Debunk_MI )
##
## 0 1
## 9432 435
table(p2$Uncertainty_Reduction)
##
## 0 1
## 478 9389
#RQ2:To what extent do the state health departments communicate outcome expectancies (2a)? What are the differences between the use of different types of expectancies (positive or negative and individual versus collective) on state health departments’ Facebook accounts between January and December 2020 (2b)?
# Assuming you have two vectors: p2$Positive_eff and p2$Negative_eff
# Perform a two-sample t-test
result_t_test1 <- t.test(p2$Positive_eff, p2$Negative_eff)
# Print the results
print(result_t_test1)
##
## Welch Two Sample t-test
##
## data: p2$Positive_eff and p2$Negative_eff
## t = 61.087, df = 10669, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.5894580 0.6285414
## sample estimates:
## mean of x mean of y
## 0.62916793 0.02016824
#parameters
model_parameters(result_t_test1)
## Welch Two Sample t-test
##
## Parameter1 | Parameter2 | Mean_Parameter1 | Mean_Parameter2 | Difference | 95% CI | t(10669.10) | p
## ------------------------------------------------------------------------------------------------------------------------
## p2$Positive_eff | p2$Negative_eff | 0.63 | 0.02 | 0.61 | [0.59, 0.63] | 61.09 | < .001
##
## Alternative hypothesis: true difference in means is not equal to 0
result_t_test2 <- t.test(p2$Collective_eff, p2$Individual_eff)
result_t_test2
##
## Welch Two Sample t-test
##
## data: p2$Collective_eff and p2$Individual_eff
## t = 28.889, df = 16935, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.2690552 0.3082226
## sample estimates:
## mean of x mean of y
## 0.4692409 0.1806020
#parameters
model_parameters(result_t_test2)
## Welch Two Sample t-test
##
## Parameter1 | Parameter2 | Mean_Parameter1 | Mean_Parameter2 | Difference | 95% CI | t(16934.97) | p
## ----------------------------------------------------------------------------------------------------------------------------
## p2$Collective_eff | p2$Individual_eff | 0.47 | 0.18 | 0.29 | [0.27, 0.31] | 28.89 | < .001
##
## Alternative hypothesis: true difference in means is not equal to 0
#RQ3:To what extent do state health departments incorporate a set of risk persuasion techniques (i.e., uncertainty reduction, health risk factors, misinformation correction and disparities) in their call-to-action regarding COVID-19 between January and December 2020?
df <- p3 %>%
mutate(
Pre_Action = ifelse(Pre_Action == 0 | Pre_Action == 88, 0, 1),
New_Risk = ifelse(Risk_Factor + Social_Disparities + Debunk_MI + Uncertainty_Reduction == 0, 0, 1),
New_Eff = ifelse(Positive_eff + Negative_eff + Individual_eff + Collective_eff == 0, 0, 1)
) %>%
filter(Pre_Action == 1)
df
## # A tibble: 8,811 × 34
## Post_oldid Real_ID date COVID_Post Risk_Factor Social_Disparities Debunk_MI
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 7 403 4/2/20 1 0 0 0
## 2 9 5842 12/20… 1 0 0 0
## 3 10 2075 3/5/20 1 0 0 0
## 4 11 2076 4/22/… 1 0 0 0
## 5 12 4003 4/17/… 1 0 0 0
## 6 14 9189 4/30/… 1 0 0 0
## 7 15 9190 4/30/… 1 0 0 0
## 8 16 404 12/12… 1 1 0 0
## 9 20 4004 5/15/… 1 0 0 0
## 10 21 9191 10/12… 1 0 0 0
## # ℹ 8,801 more rows
## # ℹ 27 more variables: Uncertainty_Reduction <dbl>, Testing <dbl>,
## # Vaccine <dbl>, Mental_8_YesNo <dbl>, Emotion_9_Number <dbl>,
## # Praise_10_Number <dbl>, Pop_11_Number <dbl>, Language_12_Number <dbl>,
## # Pre_Action <dbl>, Positive_eff <dbl>, Negative_eff <dbl>,
## # Individual_eff <dbl>, Collective_eff <dbl>, State_recode <dbl>,
## # Gov_poli <dbl>, Trifecta <dbl>, White_percent <dbl>, …
glimpse(df)
## Rows: 8,811
## Columns: 34
## $ Post_oldid <chr> "7", "9", "10", "11", "12", "14", "15", "16", "2…
## $ Real_ID <dbl> 403, 5842, 2075, 2076, 4003, 9189, 9190, 404, 40…
## $ date <chr> "4/2/20", "12/20/20", "3/5/20", "4/22/20", "4/17…
## $ COVID_Post <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Risk_Factor <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Social_Disparities <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Debunk_MI <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, …
## $ Uncertainty_Reduction <dbl> 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, …
## $ Testing <dbl> 88, 1, 88, 88, 88, 1, 1, 88, 88, 88, 88, 88, 88,…
## $ Vaccine <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 1, 8…
## $ Mental_8_YesNo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Emotion_9_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Praise_10_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Pop_11_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, …
## $ Language_12_Number <dbl> 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, …
## $ Pre_Action <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Positive_eff <dbl> 0, 0, 1, 1, 0, 0, 0, 2, 0, 1, 1, 0, 1, 1, 0, 1, …
## $ Negative_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Individual_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, …
## $ Collective_eff <dbl> 0, 0, 1, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, …
## $ State_recode <dbl> 2, 39, 5, 5, 9, 51, 51, 2, 9, 51, 51, 39, 51, 51…
## $ Gov_poli <dbl> 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, …
## $ Trifecta <dbl> 3, 1, 2, 2, 1, 2, 2, 3, 1, 2, 2, 1, 2, 2, 2, 2, …
## $ White_percent <dbl> 60.2, 71.4, 36.5, 36.5, 53.2, 37.5, 37.5, 60.2, …
## $ Minority_percent <dbl> 39.8, 28.6, 63.5, 63.5, 46.8, 62.5, 62.5, 39.8, …
## $ Poverty_percent <dbl> 10.2, 11.6, 11.8, 11.8, 12.7, 14.1, 14.1, 10.2, …
## $ Prosperity_ranking <dbl> 39, 18, 25, 25, 31, 8, 8, 39, 31, 8, 8, 18, 8, 8…
## $ year <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, …
## $ month <dbl> 4, 12, 3, 4, 4, 4, 4, 12, 5, 10, 10, 12, 8, 5, 1…
## $ day <dbl> 2, 20, 5, 22, 17, 30, 30, 12, 15, 12, 22, 11, 13…
## $ state <chr> "AK", "RI", "CA", "CA", "FL", "DC", "DC", "AK", …
## $ cases <dbl> 3, 0, 12, 1729, 1413, 217, 217, 520, 928, 38, 39…
## $ New_Risk <dbl> 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, …
## $ New_Eff <dbl> 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, …
table(df$Pre_Action)
##
## 1
## 8811
table(df$New_Risk)
##
## 0 1
## 233 8578
table(df$New_Eff)
##
## 0 1
## 4898 3913
# Define the APA-style theme
theme_apa <- function() {
theme_minimal() +
theme(
text = element_text(family = "Times New Roman", size = 10),
plot.title = element_text(size = 14, face = "bold", hjust = 0.5),
axis.title = element_text(size = 12, face = "bold"),
axis.text = element_text(size = 10),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
legend.position = "right",
legend.text = element_text(size = 12),
axis.line = element_line(color = "black")
)
}
Run Chi-Square for New_Risk
# Create a contingency table
contingency_New_Risk <- table(df$New_Risk)
# Perform the chi-square test
chi_square_New_Risk <- chisq.test(contingency_New_Risk)
# Print the results
chi_square_New_Risk
##
## Chi-squared test for given probabilities
##
## data: contingency_New_Risk
## X-squared = 7903.6, df = 1, p-value < 2.2e-16
model_parameters(chi_square_New_Risk)
## Chi-squared test for given probabilities
##
## Chi2(1) | p
## ----------------
## 7903.65 | < .001
# Create a data frame with the Chi-squared statistic and p-value
results <- data.frame(Test = "chi_square_New_Risk", Chi2 = chi_square_New_Risk$statistic, p = chi_square_New_Risk$p.value)
# Create a horizontal bar chart-like visualization
library(ggplot2)
ggplot(results, aes(x = Test, y = Chi2, fill = p < 0.001)) +
geom_bar(stat = "identity", width = 0.5) +
labs(title = "Chi-squared test for given probabilities",
y = "Chi-squared statistic") +
scale_fill_manual(values = c("TRUE" = "purple", "FALSE" = "blue")) +
coord_flip() +
theme_apa()
============
#RQ4: To what extent do state-level poverty, percentage of minority populations and government trifecta predict state health departments’ risk persuasion techniques (i.e., uncertainty reduction, health risk factors, misinformation correction and disparities) (4a), outcome expectancies (4b) and call-to-action (4c) on state health departments’ Facebook accounts between January and December 2020?
df1 <- p3 %>%
mutate(
Pre_Action = ifelse(Pre_Action == 0 | Pre_Action == 88, 0, 1),
New_Risk = ifelse(Risk_Factor + Social_Disparities + Debunk_MI + Uncertainty_Reduction == 0, 0, 1),
New_Eff = ifelse(Positive_eff + Negative_eff + Individual_eff + Collective_eff == 0, 0, 1)
)
df1
## # A tibble: 9,867 × 34
## Post_oldid Real_ID date COVID_Post Risk_Factor Social_Disparities Debunk_MI
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 2069 9/22/… 1 0 0 0
## 2 2 2070 8/27/… 1 0 0 0
## 3 3 2071 12/29… 1 0 0 0
## 4 4 2072 9/29/… 1 0 0 0
## 5 5 2073 10/6/… 1 0 0 0
## 6 6 2074 12/31… 1 0 0 0
## 7 7 403 4/2/20 1 0 0 0
## 8 8 5425 12/1/… 1 0 0 0
## 9 9 5842 12/20… 1 0 0 0
## 10 10 2075 3/5/20 1 0 0 0
## # ℹ 9,857 more rows
## # ℹ 27 more variables: Uncertainty_Reduction <dbl>, Testing <dbl>,
## # Vaccine <dbl>, Mental_8_YesNo <dbl>, Emotion_9_Number <dbl>,
## # Praise_10_Number <dbl>, Pop_11_Number <dbl>, Language_12_Number <dbl>,
## # Pre_Action <dbl>, Positive_eff <dbl>, Negative_eff <dbl>,
## # Individual_eff <dbl>, Collective_eff <dbl>, State_recode <dbl>,
## # Gov_poli <dbl>, Trifecta <dbl>, White_percent <dbl>, …
df1
## # A tibble: 9,867 × 34
## Post_oldid Real_ID date COVID_Post Risk_Factor Social_Disparities Debunk_MI
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 2069 9/22/… 1 0 0 0
## 2 2 2070 8/27/… 1 0 0 0
## 3 3 2071 12/29… 1 0 0 0
## 4 4 2072 9/29/… 1 0 0 0
## 5 5 2073 10/6/… 1 0 0 0
## 6 6 2074 12/31… 1 0 0 0
## 7 7 403 4/2/20 1 0 0 0
## 8 8 5425 12/1/… 1 0 0 0
## 9 9 5842 12/20… 1 0 0 0
## 10 10 2075 3/5/20 1 0 0 0
## # ℹ 9,857 more rows
## # ℹ 27 more variables: Uncertainty_Reduction <dbl>, Testing <dbl>,
## # Vaccine <dbl>, Mental_8_YesNo <dbl>, Emotion_9_Number <dbl>,
## # Praise_10_Number <dbl>, Pop_11_Number <dbl>, Language_12_Number <dbl>,
## # Pre_Action <dbl>, Positive_eff <dbl>, Negative_eff <dbl>,
## # Individual_eff <dbl>, Collective_eff <dbl>, State_recode <dbl>,
## # Gov_poli <dbl>, Trifecta <dbl>, White_percent <dbl>, …
glimpse(df1)
## Rows: 9,867
## Columns: 34
## $ Post_oldid <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10…
## $ Real_ID <dbl> 2069, 2070, 2071, 2072, 2073, 2074, 403, 5425, 5…
## $ date <chr> "9/22/20", "8/27/20", "12/29/20", "9/29/20", "10…
## $ COVID_Post <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Risk_Factor <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Social_Disparities <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Debunk_MI <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Uncertainty_Reduction <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, …
## $ Testing <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 1, 88, 88, 88, 8…
## $ Vaccine <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, …
## $ Mental_8_YesNo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Emotion_9_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Praise_10_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Pop_11_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Language_12_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, …
## $ Pre_Action <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, …
## $ Positive_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ Negative_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Individual_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Collective_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ State_recode <dbl> 5, 5, 5, 5, 5, 5, 2, 30, 39, 5, 5, 9, 6, 51, 51,…
## $ Gov_poli <dbl> 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, …
## $ Trifecta <dbl> 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 2, 1, 2, 2, 2, 3, …
## $ White_percent <dbl> 36.5, 36.5, 36.5, 36.5, 36.5, 36.5, 60.2, 54.6, …
## $ Minority_percent <dbl> 63.5, 63.5, 63.5, 63.5, 63.5, 63.5, 39.8, 45.4, …
## $ Poverty_percent <dbl> 11.8, 11.8, 11.8, 11.8, 11.8, 11.8, 10.2, 9.1, 1…
## $ Prosperity_ranking <dbl> 25, 25, 25, 25, 25, 25, 39, 12, 18, 25, 25, 31, …
## $ year <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, …
## $ month <dbl> 9, 8, 12, 9, 10, 12, 4, 12, 12, 3, 4, 4, 12, 4, …
## $ day <dbl> 22, 27, 29, 29, 6, 31, 2, 1, 20, 5, 22, 17, 22, …
## $ state <chr> "CA", "CA", "CA", "CA", "CA", "CA", "AK", "NJ", …
## $ cases <dbl> 2860, 5352, 34167, 3143, 2602, 32295, 3, 4606, 0…
## $ New_Risk <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, …
## $ New_Eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, …
table(df1$Pre_Action)
##
## 0 1
## 1056 8811
table(df1$New_Risk)
##
## 0 1
## 392 9475
table(df1$New_Eff)
##
## 0 1
## 5801 4066
df2 <- df1 %>%
#mutate_at(vars(Pre_Action, New_Risk, New_Eff), ~ifelse(. == 0, 0, 1)) %>
mutate(Trifecta = as.factor(Trifecta))
df2
## # A tibble: 9,867 × 34
## Post_oldid Real_ID date COVID_Post Risk_Factor Social_Disparities Debunk_MI
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 2069 9/22/… 1 0 0 0
## 2 2 2070 8/27/… 1 0 0 0
## 3 3 2071 12/29… 1 0 0 0
## 4 4 2072 9/29/… 1 0 0 0
## 5 5 2073 10/6/… 1 0 0 0
## 6 6 2074 12/31… 1 0 0 0
## 7 7 403 4/2/20 1 0 0 0
## 8 8 5425 12/1/… 1 0 0 0
## 9 9 5842 12/20… 1 0 0 0
## 10 10 2075 3/5/20 1 0 0 0
## # ℹ 9,857 more rows
## # ℹ 27 more variables: Uncertainty_Reduction <dbl>, Testing <dbl>,
## # Vaccine <dbl>, Mental_8_YesNo <dbl>, Emotion_9_Number <dbl>,
## # Praise_10_Number <dbl>, Pop_11_Number <dbl>, Language_12_Number <dbl>,
## # Pre_Action <dbl>, Positive_eff <dbl>, Negative_eff <dbl>,
## # Individual_eff <dbl>, Collective_eff <dbl>, State_recode <dbl>,
## # Gov_poli <dbl>, Trifecta <fct>, White_percent <dbl>, …
glimpse(df2)
## Rows: 9,867
## Columns: 34
## $ Post_oldid <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", "10…
## $ Real_ID <dbl> 2069, 2070, 2071, 2072, 2073, 2074, 403, 5425, 5…
## $ date <chr> "9/22/20", "8/27/20", "12/29/20", "9/29/20", "10…
## $ COVID_Post <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ Risk_Factor <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Social_Disparities <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Debunk_MI <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Uncertainty_Reduction <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, …
## $ Testing <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 1, 88, 88, 88, 8…
## $ Vaccine <dbl> 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, …
## $ Mental_8_YesNo <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Emotion_9_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Praise_10_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, …
## $ Pop_11_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Language_12_Number <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, …
## $ Pre_Action <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, …
## $ Positive_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ Negative_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Individual_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Collective_eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 2, …
## $ State_recode <dbl> 5, 5, 5, 5, 5, 5, 2, 30, 39, 5, 5, 9, 6, 51, 51,…
## $ Gov_poli <dbl> 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, …
## $ Trifecta <fct> 2, 2, 2, 2, 2, 2, 3, 2, 1, 2, 2, 1, 2, 2, 2, 3, …
## $ White_percent <dbl> 36.5, 36.5, 36.5, 36.5, 36.5, 36.5, 60.2, 54.6, …
## $ Minority_percent <dbl> 63.5, 63.5, 63.5, 63.5, 63.5, 63.5, 39.8, 45.4, …
## $ Poverty_percent <dbl> 11.8, 11.8, 11.8, 11.8, 11.8, 11.8, 10.2, 9.1, 1…
## $ Prosperity_ranking <dbl> 25, 25, 25, 25, 25, 25, 39, 12, 18, 25, 25, 31, …
## $ year <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, …
## $ month <dbl> 9, 8, 12, 9, 10, 12, 4, 12, 12, 3, 4, 4, 12, 4, …
## $ day <dbl> 22, 27, 29, 29, 6, 31, 2, 1, 20, 5, 22, 17, 22, …
## $ state <chr> "CA", "CA", "CA", "CA", "CA", "CA", "AK", "NJ", …
## $ cases <dbl> 2860, 5352, 34167, 3143, 2602, 32295, 3, 4606, 0…
## $ New_Risk <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, …
## $ New_Eff <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, …
table(df2$Pre_Action)
##
## 0 1
## 1056 8811
table(df2$New_Risk)
##
## 0 1
## 392 9475
table(df2$New_Eff)
##
## 0 1
## 5801 4066
#CTA
library(lmtest)
model1 <- glm(Pre_Action ~ Trifecta + Minority_percent + Poverty_percent,
family = binomial,
data = df2)
summary(model1)
##
## Call:
## glm(formula = Pre_Action ~ Trifecta + Minority_percent + Poverty_percent,
## family = binomial, data = df2)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.260297 0.173202 7.276 3.43e-13 ***
## Trifecta2 -0.356837 0.092199 -3.870 0.000109 ***
## Trifecta3 0.119049 0.102382 1.163 0.244915
## Minority_percent 0.016147 0.002765 5.841 5.20e-09 ***
## Poverty_percent 0.032139 0.014402 2.232 0.025642 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 6714.4 on 9866 degrees of freedom
## Residual deviance: 6651.7 on 9862 degrees of freedom
## AIC: 6661.7
##
## Number of Fisher Scoring iterations: 5
model_parameters(model1, exponentiate = TRUE)
## Parameter | Odds Ratio | SE | 95% CI | z | p
## ------------------------------------------------------------------------
## (Intercept) | 3.53 | 0.61 | [2.51, 4.95] | 7.28 | < .001
## Trifecta [2] | 0.70 | 0.06 | [0.58, 0.84] | -3.87 | < .001
## Trifecta [3] | 1.13 | 0.12 | [0.92, 1.38] | 1.16 | 0.245
## Minority percent | 1.02 | 2.81e-03 | [1.01, 1.02] | 5.84 | < .001
## Poverty percent | 1.03 | 0.01 | [1.00, 1.06] | 2.23 | 0.026
##
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
## computed using a Wald z-distribution approximation.
plot1 <- ggpredict(M2b1_a, “Trifecta”) |> plot() + theme_apa() + labs(title = “Predicted POE by GT”, x = “GT”, y = “POE”) + scale_x_discrete(labels = c(“No”, “Yes”))
library(ggeffects)
library(ggplot2)
library(cowplot)
# Create individual ggpredict plots and assign them to variables
plot1 <- ggpredict(model1, "Trifecta") |> plot() + theme_apa() +
labs(title = "Predicted Call-to-action by Government Trifecta",
x = "Government Trifecta",
y = "Call-to-action") +
scale_x_discrete(labels = c("No", "Yes"))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
plot2 <- ggpredict(model1, terms = "Minority_percent [all]") |> plot() + theme_apa() +
labs(title = "Predicted CTA by Minority Percentage",
x = "Minority Percentage %",
y = "Call-to-action")
plot3 <- ggpredict(model1, terms = "Poverty_percent [all]") |> plot() + theme_apa() +
labs(title = "Predicted CTA by Poverty Percentage",
x = "Poverty Percentage %",
y = "Call-to-action")
# Arrange the plots in a grid
combined_plot1 <- plot_grid(plot1, plot2, plot3, ncol = 1)
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family
## 'Times New Roman' not found in PostScript font database
# Print the combined plot
print(combined_plot1)
# Save the combined boxplot as a PNG file
ggsave("New_SED_CTA.png", plot = combined_plot1, width = 8, height = 4, units = "in")
#Risk
model2<- glm(New_Risk ~ Trifecta + Minority_percent + Poverty_percent,
family = binomial,
data = df2)
summary(model2)
##
## Call:
## glm(formula = New_Risk ~ Trifecta + Minority_percent + Poverty_percent,
## family = binomial, data = df2)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.535020 0.267777 16.936 < 2e-16 ***
## Trifecta2 -0.868950 0.153664 -5.655 1.56e-08 ***
## Trifecta3 -0.041518 0.188737 -0.220 0.826
## Minority_percent 0.005354 0.004503 1.189 0.235
## Poverty_percent -0.089013 0.021706 -4.101 4.12e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3297.2 on 9866 degrees of freedom
## Residual deviance: 3226.3 on 9862 degrees of freedom
## AIC: 3236.3
##
## Number of Fisher Scoring iterations: 6
model_parameters(model2, exponentiate = TRUE)
## Parameter | Odds Ratio | SE | 95% CI | z | p
## ---------------------------------------------------------------------------
## (Intercept) | 93.23 | 24.96 | [55.25, 157.91] | 16.94 | < .001
## Trifecta [2] | 0.42 | 0.06 | [ 0.31, 0.57] | -5.65 | < .001
## Trifecta [3] | 0.96 | 0.18 | [ 0.67, 1.41] | -0.22 | 0.826
## Minority percent | 1.01 | 4.53e-03 | [ 1.00, 1.01] | 1.19 | 0.235
## Poverty percent | 0.91 | 0.02 | [ 0.88, 0.95] | -4.10 | < .001
##
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
## computed using a Wald z-distribution approximation.
# Create individual ggpredict plots and assign them to variables
plot1 <- ggpredict(model2, "Trifecta") |> plot() + theme_apa() +
labs(title = "Predicted Risks by Government Trifecta",
x = "Government Trifecta",
y = "Risks") +
scale_x_discrete(labels = c("No", "Yes"))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
plot2 <- ggpredict(model2, terms = "Minority_percent [all]") |> plot() + theme_apa() +
labs(title = "Predicted Risks by Minority Percentage",
x = "Minority Percentage %",
y = "Risks")
plot3 <- ggpredict(model2, terms = "Poverty_percent [all]") |> plot() + theme_apa() +
labs(title = "Predicted Risks by Poverty Percentage",
x = "Poverty Percentage %",
y = "Risks")
# Arrange the plots in a grid
combined_plot2 <- plot_grid(plot2, plot3, ncol = 1)
# Print the combined plot
print(combined_plot2)
# Save the combined boxplot as a PNG file
ggsave("New_SED_Risks.png", plot = combined_plot2, width = 8, height = 4, units = "in")
#Expectancy
model3<- glm(New_Eff ~ Trifecta + Minority_percent + Poverty_percent,
family = binomial,
data = df2)
summary(model3)
##
## Call:
## glm(formula = New_Eff ~ Trifecta + Minority_percent + Poverty_percent,
## family = binomial, data = df2)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.277426 0.112347 -2.469 0.0135 *
## Trifecta2 -0.646443 0.058829 -10.988 < 2e-16 ***
## Trifecta3 -0.321824 0.065790 -4.892 1.00e-06 ***
## Minority_percent 0.031020 0.001783 17.401 < 2e-16 ***
## Poverty_percent -0.075326 0.009313 -8.088 6.04e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13372 on 9866 degrees of freedom
## Residual deviance: 13045 on 9862 degrees of freedom
## AIC: 13055
##
## Number of Fisher Scoring iterations: 4
model_parameters(model3, exponentiate = TRUE)
## Parameter | Odds Ratio | SE | 95% CI | z | p
## -------------------------------------------------------------------------
## (Intercept) | 0.76 | 0.09 | [0.61, 0.94] | -2.47 | 0.014
## Trifecta [2] | 0.52 | 0.03 | [0.47, 0.59] | -10.99 | < .001
## Trifecta [3] | 0.72 | 0.05 | [0.64, 0.82] | -4.89 | < .001
## Minority percent | 1.03 | 1.84e-03 | [1.03, 1.04] | 17.40 | < .001
## Poverty percent | 0.93 | 8.64e-03 | [0.91, 0.94] | -8.09 | < .001
##
## Uncertainty intervals (profile-likelihood) and p-values (two-tailed)
## computed using a Wald z-distribution approximation.
# Create individual ggpredict plots and assign them to variables
plot1 <- ggpredict(model3, "Trifecta") |> plot() + theme_apa() +
labs(title = "Predicted Outcome Expectancy by Government Trifecta",
x = "Government Trifecta",
y = "Outcome Expectancy") +
scale_x_discrete(labels = c("No", "Yes"))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
plot2 <- ggpredict(model3, terms = "Minority_percent [all]") |> plot() + theme_apa() +
labs(title = "Predicted Outcome Expectancy by
Minority Percentage",
x = "Minority Percentage %",
y = "Outcome Expectancy")
plot3 <- ggpredict(model3, terms = "Poverty_percent [all]") |> plot() + theme_apa() +
labs(title = "Predicted Outcome Expectancy by
Poverty Percentage",
x = "Poverty Percentage %",
y = "Outcome Expectancy")
# Arrange the plots in a grid
combined_plot3 <- plot_grid(plot1, plot2, plot3, ncol = 1)
# Print the combined plot
print(combined_plot3)
# Save the combined boxplot as a PNG file
ggsave("New_SED_Expectancy.png", plot = combined_plot3, width = 8, height = 4, units = "in")
===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE===o0o===END HERE
modela <- glm(Pre_Action ~ cases, family = binomial, data = df2) summary(modela)
modelb <- glm(New_Risk ~ cases, family = binomial, data = df2) summary(modelb)
modelc <- glm(New_Eff ~ cases, family = binomial, data = df2) summary(modelc)